library(ggplot2)
library(reshape2)
library(dplyr)
library(tidyr)
library(GGally)
library(grid)
"%&%" = function(a,b) paste(a,b,sep="")
se <- function(x) sqrt(var(x,na.rm=TRUE)/length(is.na(x)==FALSE))
source('/Volumes/im-lab/nas40t2/hwheeler/PrediXcan_CV/GTEx_2014-06013_release/transfers/PrediXmod/Paper_plots/multiplot.R')
my.dir <- '/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/'
fig.dir <- '~/GitHub/GenArch/GenArchPaper/Figures/'
rna.dir <- my.dir %&% "gtex-rnaseq/"
annot.dir <- my.dir %&% "gtex-annot/"
out.dir <- rna.dir %&% "ind-tissues-RPKM/"
##read in SE calculated from 100 perms - Price et al. method
setable <- read.table(my.dir %&% "SE_estimate_from_h2_localonly_reml-no-constrain_allgenes_100perms.txt",header=T)
dgn <- read.table(my.dir %&% 'expArch_DGN-WB_imputedGTs/DGN-WB.h2.all.models_FHSfdr0.05.Chr1-22_globalAll_reml-no-constrain.2015-12-15.txt',header=T)
tislist <- scan(my.dir %&% 'gtex-rnaseq/ind-tissues-from-nick/GTEx_PrediXmod.tissue.list',sep="\n",what="character")
tisspacelist <- scan(my.dir %&% '40.tissue.list',sep="\n",what="character")
table1 <- matrix(0,nrow=length(tislist)+2,ncol=6)
n <- dgn$N[1]
#numexpgenes <- dim(dgn)[1]
numexpgenes <- length(dgn$local.p[is.na(dgn$local.p)==FALSE])
meanh2 <- sprintf("%.3f",mean(dgn$local.h2,na.rm=TRUE))
seperm <- setable$se[1]
meanandse <- meanh2 %&% " (" %&% seperm %&% ")"
pest <- dgn %>% mutate(pchi=pchisq((local.h2/local.se)^2, df=1, lower.tail=FALSE)) %>% mutate(local.q=p.adjust(pchi,method="BH")) %>% arrange(local.h2) %>% mutate(Qlt05=local.q<0.1)
propsig <- sprintf("%.1f",table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="n"))*100)
numsig <- table(pest$Qlt05)[2]
table1[1,] <- c("DGN Whole Blood",n,meanandse,propsig,numsig,numexpgenes)
hist(pest$local.p,main="DGN")

hist(pest$pchi,main="DGN")

#add cross-tissue to table 1
ct <- read.table(my.dir %&% 'cross-tissue.h2.all.models_FHSfdr0.05.Chr1-22_globaleQTLOtherChr_reml-no-constrain.2015-12-14.txt',header=T)
n <- ct$N[1]
#numexpgenes<-dim(ct)[1]
numexpgenes <- length(ct$local.p[is.na(ct$local.p)==FALSE])
meanh2 <- sprintf("%.3f",mean(ct$local.h2,na.rm=TRUE))
seperm <- setable$se[2]
meanandse <- meanh2 %&% " (" %&% seperm %&% ")"
pest <- ct %>% mutate(pchi=pchisq((local.h2/local.se)^2, df=1, lower.tail=FALSE)) %>% mutate(local.q=p.adjust(pchi,method="BH")) %>% arrange(local.h2) %>% mutate(Qlt05=local.q<0.1)
propsig <- sprintf("%.1f",table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="n"))*100)
numsig <- table(pest$Qlt05)[2]
table1[2,] <- c("Cross-tissue",n,meanandse,propsig,numsig,numexpgenes)
hist(pest$local.p,main="CT")

hist(pest$pchi,main="CT")

##calc mean global for DGN
signif(mean(dgn$loc.jt.h2,na.rm=TRUE),3)
## [1] 0.143
signif(se(dgn$loc.jt.h2),3)
## [1] 0.00153
signif(mean(dgn$glo.jt.h2,na.rm=TRUE),3)
## [1] 0.034
signif(se(dgn$glo.jt.h2),3)
## [1] 0.00239
pest <- dgn %>% mutate(glo.jt.P=pchisq((glo.jt.h2/glo.jt.se)^2, df=1, lower.tail=FALSE)) %>% mutate(glo.jt.Q=p.adjust(glo.jt.P,method="BH")) %>% arrange(glo.jt.h2) %>% mutate(Qlt05=glo.jt.Q<0.1)
propsig <- table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="n"))*100
propsig
## TRUE
## 4.905808
table(pest$Qlt05)
##
## FALSE TRUE
## 9692 500
##prop loc
signif(mean(dgn$loc.jt.h2,na.rm=TRUE),3)/(signif(mean(dgn$loc.jt.h2,na.rm=TRUE),3)+signif(mean(dgn$glo.jt.h2,na.rm=TRUE),3))
## [1] 0.8079096
for(i in 1:length(tislist)){
tis <- tislist[i]
data <- read.table(my.dir %&% 'GTEx.TW.' %&% tis %&% '.h2.all.models_FHSfdr0.05.Chr1-22_globaleQTLOtherChr_reml-no-constrain.2015-12-14.txt',header=T,sep="\t")
explist <- scan(out.dir %&% tis %&% ".meanRPKMgt0.1_3samplesRPKMgt0_genelist","c")
data <- dplyr::filter(data,ensid %in% explist)
n <- data$N[1]
#numexpgenes <- dim(data)[1]
numexpgenes <- length(data$local.p[is.na(data$local.p)==FALSE]) ##num expressed genes mean(RPKM)>0.1
meanh2 <- sprintf("%.3f",mean(data$local.h2,na.rm=TRUE))
seperm <- setable$se[i+2]
pest <- data %>% mutate(pchi=pchisq((local.h2/local.se)^2, df=1, lower.tail=FALSE)) %>% mutate(local.q=p.adjust(pchi,method="BH")) %>% arrange(local.h2) %>% mutate(Qlt05=local.q<0.1)
propsig <- sprintf("%.1f",table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="n"))*100)
numsig <- table(pest$Qlt05)[2]
tisspace <- tisspacelist[i]
meanandse <- meanh2 %&% " (" %&% seperm %&% ")"
tableinfo <- c(tisspace,n,meanandse,propsig,numsig,numexpgenes)
table1[i+2,] <- tableinfo
hist(pest$local.p,main=tis)
hist(pest$pchi,main=tis)
}
















































































colnames(table1)=c("tissue","n","mean h2 (SE)","% FDR<0.1","num FDR<0.1","num expressed")
#table1
library(xtable)
tab <- xtable(table1)
print(tab, type="latex",include.rownames=FALSE)
## % latex table generated in R 3.2.0 by xtable 1.8-0 package
## % Wed Aug 10 21:13:04 2016
## \begin{table}[ht]
## \centering
## \begin{tabular}{llllll}
## \hline
## tissue & n & mean h2 (SE) & \% FDR$<$0.1 & num FDR$<$0.1 & num expressed \\
## \hline
## DGN Whole Blood & 922 & 0.149 (0.0039) & 70.6 & 7474 & 10590 \\
## Cross-tissue & 450 & 0.062 (0.017) & 20.2 & 2995 & 14861 \\
## Adipose - Subcutaneous & 298 & 0.038 (0.028) & 24.6 & 2666 & 10837 \\
## Adrenal Gland & 126 & 0.043 (0.018) & 23.9 & 2479 & 10384 \\
## Artery - Aorta & 198 & 0.042 (0.024) & 21.2 & 2385 & 11263 \\
## Artery - Coronary & 119 & 0.037 (0.048) & 22.3 & 2326 & 10427 \\
## Artery - Tibial & 285 & 0.042 (0.02) & 23.9 & 2489 & 10393 \\
## Brain - Anterior cingulate cortex (BA24) & 72 & 0.028 (0.036) & 18.6 & 2235 & 12013 \\
## Brain - Caudate (basal ganglia) & 100 & 0.037 (0.047) & 23.1 & 2521 & 10896 \\
## Brain - Cerebellar Hemisphere & 89 & 0.049 (0.068) & 20.1 & 2294 & 11391 \\
## Brain - Cerebellum & 103 & 0.050 (0.06) & 23.8 & 2646 & 11129 \\
## Brain - Cortex & 96 & 0.045 (0.057) & 24.4 & 2593 & 10636 \\
## Brain - Frontal Cortex (BA9) & 92 & 0.038 (0.077) & 21.8 & 2416 & 11070 \\
## Brain - Hippocampus & 81 & 0.037 (0.05) & 21.4 & 2376 & 11120 \\
## Brain - Hypothalamus & 81 & 0.017 (0.043) & 18.7 & 2238 & 11999 \\
## Brain - Nucleus accumbens (basal ganglia) & 93 & 0.029 (0.04) & 22.6 & 2498 & 11038 \\
## Brain - Putamen (basal ganglia) & 82 & 0.032 (0.064) & 22.8 & 2495 & 10958 \\
## Breast - Mammary Tissue & 183 & 0.029 (0.037) & 22.9 & 2496 & 10894 \\
## Cells - EBV-transformed lymphocytes & 115 & 0.058 (0.1) & 23.5 & 2066 & 8774 \\
## Cells - Transformed fibroblasts & 272 & 0.051 (0.031) & 26.5 & 2552 & 9615 \\
## Colon - Sigmoid & 124 & 0.033 (0.019) & 19.9 & 2298 & 11534 \\
## Colon - Transverse & 170 & 0.036 (0.058) & 21.5 & 2398 & 11152 \\
## Esophagus - Gastroesophageal Junction & 127 & 0.032 (0.039) & 20.9 & 2270 & 10854 \\
## Esophagus - Mucosa & 242 & 0.042 (0.03) & 20.8 & 2432 & 11703 \\
## Esophagus - Muscularis & 219 & 0.039 (0.015) & 21.7 & 2493 & 11499 \\
## Heart - Atrial Appendage & 159 & 0.042 (0.03) & 22.3 & 2327 & 10447 \\
## Heart - Left Ventricle & 190 & 0.034 (0.034) & 20.5 & 2203 & 10742 \\
## Liver & 98 & 0.033 (0.062) & 21.7 & 2230 & 10295 \\
## Lung & 279 & 0.032 (0.034) & 21.4 & 2509 & 11719 \\
## Muscle - Skeletal & 361 & 0.033 (0.03) & 23.4 & 2301 & 9829 \\
## Nerve - Tibial & 256 & 0.052 (0.087) & 27.1 & 2992 & 11048 \\
## Ovary & 85 & 0.037 (0.094) & 25.0 & 2418 & 9690 \\
## Pancreas & 150 & 0.047 (0.024) & 22.4 & 2398 & 10697 \\
## Pituitary & 87 & 0.038 (0.055) & 21.4 & 2527 & 11828 \\
## Skin - Not Sun Exposed (Suprapubic) & 196 & 0.041 (0.034) & 22.9 & 2490 & 10888 \\
## Skin - Sun Exposed (Lower leg) & 303 & 0.039 (0.016) & 23.5 & 2687 & 11442 \\
## Small Intestine - Terminal Ileum & 77 & 0.036 (0.053) & 24.1 & 2591 & 10771 \\
## Spleen & 89 & 0.059 (0.061) & 24.1 & 2451 & 10170 \\
## Stomach & 171 & 0.032 (0.025) & 21.4 & 2338 & 10903 \\
## Testis & 157 & 0.054 (0.044) & 22.0 & 3009 & 13703 \\
## Thyroid & 279 & 0.044 (0.066) & 24.7 & 2838 & 11487 \\
## Whole Blood & 339 & 0.033 (0.023) & 25.4 & 2260 & 8915 \\
## \hline
## \end{tabular}
## \end{table}
Calculate num expressed genes and make lists per tissue for filtering
- Expressed: mean RPKM > 0.1 and at least 3 samples with RPKM > 0
tislist <- scan(my.dir %&% 'tissue.list',sep="\n",what="character")
expidlist <- scan(rna.dir %&% "GTEx_Analysis_2014-06-13.RNA-seq.ID.list","character")
expgenelist <- scan(rna.dir %&% "GTEx_Analysis_2014-06-13.RNA-seq.GENE.list","character")
exp <- readRDS(rna.dir %&% "GTEx_Analysis_2014-06-13.RNA-seq.GENExID.RDS")
expdata <- matrix(exp, ncol=length(expidlist), byrow=T)
t.expdata <- t(expdata)
rownames(t.expdata) <- expidlist
colnames(t.expdata) <- expgenelist
gencodefile <- annot.dir %&% "gencode.v18.genes.patched_contigs.summary.protein"
gencode <- read.table(gencodefile)
rownames(gencode) <- gencode[,5]
t.expdata <- t.expdata[,intersect(colnames(t.expdata),rownames(gencode))] ###pull protein coding gene expression data
sam <- read.table(annot.dir %&% "GTEx_Analysis_2014-06-13.SampleTissue.annot",header=T,sep="\t")
for(i in 1:length(tislist)){
tissue <- tislist[i]
tis<- gsub(' ','',tissue) ##removes all whitespace to match .RDS files
sample <- subset(sam,SMTSD == tissue) ### pull sample list of chosen tissue
tissue.exp <- t.expdata[intersect(rownames(t.expdata),sample$SAMPID),] ###pull expression data for chosen tissue###
tissue.exp <- t(tissue.exp) #for merging in R
explist <- subset(rowMeans(tissue.exp), rowMeans(tissue.exp)>0.1) ###pull genes with mean expression > 0.1###
explist <- names(explist)
nz.expdata <- tissue.exp[explist,]
#calc 10% of sample size
tenpercent <- round(0.1*dim(nz.expdata)[2])
rowtable<-function(x) table(x>0)[[1]] > 2 ##function to determine if >2 samples have exp levels >0
nonbin<-apply(nz.expdata,1,rowtable) ##apply to matrix
gt2.expdata <- nz.expdata[nonbin,] ##remove genes with >10% of RPKM's==0
write.table(rownames(gt2.expdata),file=out.dir %&% tis %&% ".meanRPKMgt0.1_3samplesRPKMgt0_genelist",quote=FALSE,col.names = FALSE,row.names=FALSE)
cat(tis,":",dim(gt2.expdata)[1],"genes\n")
}